Get data from kosis

http://kosis.kr/ups/ups_01List.jsp?pubcode=AD or http://kosis.kr/upsHtml/online.do?isOnline=Y&PART=G&pubcode=AD http://kosis.kr/upsHtml/online/downSrvcFile.do?PUBCODE=AD&SEQ=2959&FILE_NAME=030117.xlsx 원하는 xlsx파일을 data/xls에 저장한 후 읽어들인다.

require(readxl)
xls_file <- "data/xls/2015년_030117.xlsx"
xdf <- read_excel(xls_file)
head(xdf)
colnames(xdf)
 [1] "전출지"       "전입지"       "계"           "0~4세"        "5~9세"        "10~14세"     
 [7] "15~19세"      "20~24세"      "25~29세"      "30~34세"      "35~39세"      "40~44세"     
[13] "45~49세"      "50~54세"      "55~59세"      "60~64세"      "65~69세"      "70~74세"     
[19] "75~79세"      "80세이상"     "남(계)"       "남(0~4세)"    "남(5~9세)"    "남(10~14세)" 
[25] "남(15~19세)"  "남(20~24세)"  "남(25~29세)"  "남(30~34세)"  "남(35~39세)"  "남(40~44세)" 
[31] "남(45~49세)"  "남(50~54세)"  "남(55~59세)"  "남(60~64세)"  "남(65~69세)"  "남(70~74세)" 
[37] "남(75~79세)"  "남(80세이상)" "여(계)"       "여(0~4세)"    "여(5~9세)"    "여(10~14세)" 
[43] "여(15~19세)"  "여(20~24세)"  "여(25~29세)"  "여(30~34세)"  "여(35~39세)"  "여(40~44세)" 
[49] "여(45~49세)"  "여(50~54세)"  "여(55~59세)"  "여(60~64세)"  "여(65~69세)"  "여(70~74세)" 
[55] "여(75~79세)"  "여(80세이상)" "전출지코드"   "전입지코드"  

전입지, 전출지, 계만 남기고 다른 컬럼들은 무시한다. 전입지, 전출지중에서 시군구 레벨만 남긴다. 외부로의 이동만 포함

df <- select_(xdf, from="전입지", to="전출지", value="계", mvalue=21, fvalue=39) %>%
  filter(from == '세종' | nchar(from)>2) %>%
  filter(to == '세종' | nchar(to)>2) %>%
  filter(from != to )
head(df)

제일많이 빠져나간 곳은 어딜까?

sdf <- arrange(df, desc(value))  
head(sdf)
sdf <- filter(df, value>1000)
ggplot(sdf, aes(value)) + geom_histogram(binwidth=500)

남녀간 이동의 차이

ggplot(df, aes(mvalue - fvalue)) + geom_histogram(binwidth=5) +ylim(c(0,1000))

mean(df$mvalue-df$fvalue)
[1] 2.624549
sdf <- filter(df, value>2000) %>% arrange(from)
require(circlize)
par(family='Apple SD Gothic Neo')
chordDiagram(sdf, annotationTrack = "grid",
  preAllocateTracks = list(track.height = 0.3))
Unknown column 'rank'
# we go back to the first track and customize sector labels
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
  xlim = get.cell.meta.data("xlim")
  ylim = get.cell.meta.data("ylim")
  sector.name = get.cell.meta.data("sector.index")
  circos.text(mean(xlim), ylim[1], sector.name, facing = "clockwise",
    niceFacing = TRUE, adj = c(0, 0.5))
}, bg.border = NA) # here set bg.border to NA is important

네트워크로 표현해볼까.

library(igraph)
ndf <- filter(df, value>1000) %>% arrange(from)
g <- graph_from_data_frame(ndf)

그려보자

par(family='Apple SD Gothic Neo')
edgeScale = 0.002
plot(g, vertex.size=2, 
     vertex.label.family='Apple SD Gothic Neo', vertex.label.cex=2,
     edge.width = E(g)$value * edgeScale, edge.arrow.size=E(g)$value * edgeScale * 0.4,
     edge.color = rgb(0.2,0.2,0.2,.2))

LS0tCnRpdGxlOiAi7ZWc6rWt7J2YIOyduOq1rCDsnbTrj5kiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIEdldCBkYXRhIGZyb20ga29zaXMKCmh0dHA6Ly9rb3Npcy5rci91cHMvdXBzXzAxTGlzdC5qc3A/cHViY29kZT1BRApvcgpodHRwOi8va29zaXMua3IvdXBzSHRtbC9vbmxpbmUuZG8/aXNPbmxpbmU9WSZQQVJUPUcmcHViY29kZT1BRApodHRwOi8va29zaXMua3IvdXBzSHRtbC9vbmxpbmUvZG93blNydmNGaWxlLmRvP1BVQkNPREU9QUQmU0VRPTI5NTkmRklMRV9OQU1FPTAzMDExNy54bHN4CuybkO2VmOuKlCB4bHN47YyM7J287J2EIGRhdGEveGxz7JeQIOyggOyepe2VnCDtm4Qg7J297Ja065Ok7J2464ukLgoKYGBge3J9CnJlcXVpcmUocmVhZHhsKQp4bHNfZmlsZSA8LSAiZGF0YS94bHMvMjAxNeuFhF8wMzAxMTcueGxzeCIKeGRmIDwtIHJlYWRfZXhjZWwoeGxzX2ZpbGUpCmhlYWQoeGRmKQpjb2xuYW1lcyh4ZGYpCmBgYAoK7KCE7J6F7KeALCDsoITstpzsp4AsIOqzhOunjCDrgqjquLDqs6Ag64uk66W4IOy7rOufvOuTpOydgCDrrLTsi5ztlZzri6QuCuyghOyeheyngCwg7KCE7Lac7KeA7KSR7JeQ7IScIOyLnOq1sOq1rCDroIjrsqjrp4wg64Ko6ri064ukLiAK7Jm467aA66Gc7J2YIOydtOuPmeunjCDtj6ztlagKCmBgYHtyfQpkZiA8LSBzZWxlY3RfKHhkZiwgZnJvbT0i7KCE7J6F7KeAIiwgdG89IuyghOy2nOyngCIsIHZhbHVlPSLqs4QiLCBtdmFsdWU9MjEsIGZ2YWx1ZT0zOSkgJT4lCiAgZmlsdGVyKGZyb20gPT0gJ+yEuOyihScgfCBuY2hhcihmcm9tKT4yKSAlPiUKICBmaWx0ZXIodG8gPT0gJ+yEuOyihScgfCBuY2hhcih0byk+MikgJT4lCiAgZmlsdGVyKGZyb20gIT0gdG8gKQoKaGVhZChkZikKYGBgCgrsoJzsnbzrp47snbQg67mg7KC464KY6rCEIOqzs+ydgCDslrTrlJzquYw/CgpgYGB7cn0KZGYgPC0gYXJyYW5nZShkZiwgZGVzYyh2YWx1ZSkpICAKaGVhZChkZikKYGBgCgpgYGB7cn0Kc2RmIDwtIGZpbHRlcihkZiwgdmFsdWU+MTAwMCkKZ2dwbG90KHNkZiwgYWVzKHZhbHVlKSkgKyBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD01MDApCmBgYAoK64Ko64WA6rCEIOydtOuPmeydmCDssKjsnbQKYGBge3J9CmdncGxvdChkZiwgYWVzKG12YWx1ZSAtIGZ2YWx1ZSkpICsgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9NSkgK3lsaW0oYygwLDEwMDApKQptZWFuKGRmJG12YWx1ZS1kZiRmdmFsdWUpCmBgYAoKYGBge3IgZmlnLmhlaWdodD0xMCwgZmlnLndpZHRoPTEwfQpzZGYgPC0gZmlsdGVyKGRmLCB2YWx1ZT4yMDAwKSAlPiUgYXJyYW5nZShmcm9tKQpyZXF1aXJlKGNpcmNsaXplKQpwYXIoZmFtaWx5PSdBcHBsZSBTRCBHb3RoaWMgTmVvJykKY2hvcmREaWFncmFtKHNkZiwgYW5ub3RhdGlvblRyYWNrID0gImdyaWQiLAogIHByZUFsbG9jYXRlVHJhY2tzID0gbGlzdCh0cmFjay5oZWlnaHQgPSAwLjMpKQoKIyB3ZSBnbyBiYWNrIHRvIHRoZSBmaXJzdCB0cmFjayBhbmQgY3VzdG9taXplIHNlY3RvciBsYWJlbHMKY2lyY29zLnRyYWNrUGxvdFJlZ2lvbih0cmFjay5pbmRleCA9IDEsIHBhbmVsLmZ1biA9IGZ1bmN0aW9uKHgsIHkpIHsKICB4bGltID0gZ2V0LmNlbGwubWV0YS5kYXRhKCJ4bGltIikKICB5bGltID0gZ2V0LmNlbGwubWV0YS5kYXRhKCJ5bGltIikKICBzZWN0b3IubmFtZSA9IGdldC5jZWxsLm1ldGEuZGF0YSgic2VjdG9yLmluZGV4IikKICBjaXJjb3MudGV4dChtZWFuKHhsaW0pLCB5bGltWzFdLCBzZWN0b3IubmFtZSwgZmFjaW5nID0gImNsb2Nrd2lzZSIsCiAgICBuaWNlRmFjaW5nID0gVFJVRSwgYWRqID0gYygwLCAwLjUpKQp9LCBiZy5ib3JkZXIgPSBOQSkgIyBoZXJlIHNldCBiZy5ib3JkZXIgdG8gTkEgaXMgaW1wb3J0YW50CmBgYAoK64Sk7Yq47JuM7YGs66GcIO2RnO2YhO2VtOuzvOq5jC4KCmBgYHtyfQpsaWJyYXJ5KGlncmFwaCkKbmRmIDwtIGZpbHRlcihkZiwgdmFsdWU+MTAwMCkgJT4lIGFycmFuZ2UoZnJvbSkKZyA8LSBncmFwaF9mcm9tX2RhdGFfZnJhbWUobmRmKQpgYGAKCuq3uOugpOuztOyekAoKYGBge3IgZmlnLmhlaWdodD0yMCwgZmlnLndpZHRoPTIwfQpwYXIoZmFtaWx5PSdBcHBsZSBTRCBHb3RoaWMgTmVvJykKZWRnZVNjYWxlID0gMC4wMDIKcGxvdChnLCB2ZXJ0ZXguc2l6ZT0yLCAKICAgICB2ZXJ0ZXgubGFiZWwuZmFtaWx5PSdBcHBsZSBTRCBHb3RoaWMgTmVvJywgdmVydGV4LmxhYmVsLmNleD0yLAogICAgIGVkZ2Uud2lkdGggPSBFKGcpJHZhbHVlICogZWRnZVNjYWxlLCBlZGdlLmFycm93LnNpemU9RShnKSR2YWx1ZSAqIGVkZ2VTY2FsZSAqIDAuNCwKICAgICBlZGdlLmNvbG9yID0gcmdiKDAuMiwwLjIsMC4yLC4yKSkKYGBgCgoKCgo=